# -*- coding: utf-8 -*-
import scrapy
from douban.items import DoubanItem

DOUBAN_MOVIE_TOP_250_URL = 'https://movie.douban.com/top250'


class DoubanSpiderSpider(scrapy.Spider):
    # 爬虫名称
    name = 'douban_spider'
    # 域名限制
    allowed_domains = ['movie.douban.com']
    # 人口URL
    start_urls = [DOUBAN_MOVIE_TOP_250_URL]

    def parse(self, response):
        # print('---------response: %s' % response.text)

        movie_list = response.xpath("//div[@class='article']//ol[@class='grid_view']/li")

        for item in movie_list:
            douban_item = DoubanItem()
            douban_item['serial_number'] = item.xpath(".//div[@class='item']//em/text()").extract_first()
            douban_item['movie_name'] = item.xpath(".//div[@class='info']//div[@class='hd']/a/span[1]/text()").extract_first()

            contents = item.xpath(".//div[@class='info']//div[@class='bd']/p[1]/text()").extract()

            for content in contents:
                content_str = "".join(content.split())
                douban_item['introduction'] = content_str

            douban_item['star_level'] = item.xpath(".//span[@class='rating_num']/text()").extract_first()
            douban_item['evaluate_number'] = item.xpath(".//div[@class='star']//span[4]/text()").extract_first()
            douban_item['description'] = item.xpath(".//p[@class='quote']/span/text()").extract_first()
            yield douban_item
        next_link = response.xpath("//span[@class='next']/link/@href").extract()

        if next_link:
            next_link = next_link[0]
            yield scrapy.Request(DOUBAN_MOVIE_TOP_250_URL + next_link, callback=self.parse)

# 2019-02-11 18:15:31 [scrapy.core.scraper] DEBUG: Scraped from <200 https://movie.douban.com/top250>
# {'description': '希望让人自由。',
#  'evaluate_number': '1307799人评价',
#  'introduction': '1994/美国/犯罪剧情',
#  'movie_name': '肖申克的救赎',
#  'serial_number': '1',
#  'star_level': '9.6'}
# 2019-02-11 18:15:31 [scrapy.core.scraper] DEBUG: Scraped from <200 https://movie.douban.com/top250>
# {'description': '风华绝代。',
#  'evaluate_number': '965712人评价',
#  'introduction': '1993/中国大陆香港/剧情爱情同性',
#  'movie_name': '霸王别姬',
#  'serial_number': '2',
#  'star_level': '9.6'}
